#import the reqired libraries
import random
random.seed(0)
# Ignore the warnings
import warnings
warnings.filterwarnings("ignore")
import os
import numpy as np
import pandas as pd
import cv2
from glob import glob
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from tensorflow.keras.layers import Conv2D, Activation, BatchNormalization
from tensorflow.keras.layers import UpSampling2D, Input, Concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.metrics import Recall, Precision
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import binary_crossentropy
from tensorflow.keras.applications.mobilenet import preprocess_input
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import ZeroPadding2D, Convolution2D, MaxPooling2D, Dropout, Flatten
from PIL import Image
from numpy import asarray
%tensorflow_version 2.x
import tensorflow
tensorflow.__version__
from google.colab.patches import cv2_imshow
# Load the Drive helper and mount
from google.colab import drive
from sklearn.model_selection import train_test_split
from zipfile import ZipFile
• DOMAIN: Entertainment
• CONTEXT: Company X owns a movie application and repository which caters movie streaming to millions of users who on subscription basis. Company wants to automate the process of cast and crew information in each scene from a movie such that when a user pauses on the movie and clicks on cast information button, the app will show details of the actor in the scene. Company has an in-house computer vision and multimedia experts who need to detect faces from screen shots from the movie scene.
• DATA DESCRIPTION: The dataset comprises of images and its mask where there is a human face.
• PROJECT OBJECTIVE: Face detection from training images.
Steps and tasks:
# This will prompt for authorization.
drive.mount('/content/drive/')
!ls "/content/drive/MyDrive/Colab Notebooks/Adv_CV2"
cd "/content/drive/MyDrive/Colab Notebooks/Adv_CV2"
# Loading the images file
data = np.load('Part 1- Train data - images.npy', allow_pickle=True)
print('size:',data.size)
print('shape', data.shape)
cv2_imshow(data[200][0])
fi,axis = plt.subplots(10,3,figsize=(20,30))
row = 0
col = 0
index = 0
for i in range(30):
axis[row][col].imshow(data[index][0], interpolation='nearest')
index = index + 12
col = col + 1
if col > 2:
row = row + 1
col = 0
plt.show()
#Declare a variable IMAGE_SIZE = 224 as we will be taking Input shape as 224 * 224
IMAGE_SIZE = 224
masks = np.zeros((int(data.shape[0]), IMAGE_SIZE, IMAGE_SIZE))
X = np.zeros((int(data.shape[0]),IMAGE_SIZE, IMAGE_SIZE, 3))
for index in range(data.shape[0]):
img = data[index][0]
image = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE))
#only taking first 3 channel of each image
try:
image = image[:,:,:3]
except:
print(f"Number {index} is Grayscale image, converting to RGB to make it standard in data")
image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
continue
X[index] = preprocess_input(np.array(image, dtype=np.float32)) # Convert to float32 array
for i in data[index][1]:
x1 = int(i['points'][0]['x'] * IMAGE_SIZE)
x2 = int(i['points'][1]['x'] * IMAGE_SIZE)
y1 = int(i['points'][0]['y'] * IMAGE_SIZE)
y2 = int(i['points'][1]['y'] * IMAGE_SIZE)
masks[index][y1:y2, x1:x2] = 1
print(f"Shape of X is '{X.shape}' and the shape of mask dataset is '{masks.shape}' ")
#split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, masks, test_size=0.2)
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size=0.2)
print(f"Shape of X_train is '{X_train.shape}' and the shape of y_train is '{y_train.shape}'")
print(f"Shape of X_val is '{X_val.shape}' and the shape of y_val is '{y_val.shape}'")
print(f"Shape of X_test is '{X_test.shape}' and the shape of y_test is '{y_test.shape}'")
#view a train image
fig = plt.figure(figsize=(15, 15))
a = fig.add_subplot(1, 4, 1)
imgplot = plt.imshow(X_train[0], cmap='gray')
a = fig.add_subplot(1, 4, 2)
imgplot = plt.imshow(X_train[10])
imgplot.set_clim(0.0, 0.7)
a = fig.add_subplot(1, 4, 3)
imgplot = plt.imshow(X_train[20])
imgplot.set_clim(0.0, 1.4)
a = fig.add_subplot(1, 4, 4)
imgplot = plt.imshow(X_train[30])
imgplot.set_clim(0.0, 2.1)
fig = plt.figure(figsize=(15, 15))
a = fig.add_subplot(1, 4, 1)
imgplot = plt.imshow(y_train[0])
a = fig.add_subplot(1, 4, 2)
imgplot = plt.imshow(y_train[10])
imgplot.set_clim(0.0, 0.7)
a = fig.add_subplot(1, 4, 3)
imgplot = plt.imshow(y_train[20])
imgplot.set_clim(0.0, 1.4)
a = fig.add_subplot(1, 4, 4)
imgplot = plt.imshow(y_train[30])
imgplot.set_clim(0.0, 1.4)
def create_model(trainable=True):
inputs = Input(shape=(IMAGE_SIZE, IMAGE_SIZE, 3), name="input_image")
ALPHA = 0.35 # Width hyper parameter for MobileNet (0.25, 0.5, 0.75, 1.0). Higher width means more accurate but slower
# Do not include classification (top) layer
model = MobileNetV2(input_tensor=inputs, weights="imagenet", include_top=False, alpha=ALPHA)
skip_connection_names = ["input_image", "block_1_expand_relu", "block_3_expand_relu", "block_6_expand_relu"]
encoder_output = model.get_layer("block_13_expand_relu").output
f = [16, 32, 48, 64]
x = encoder_output
for i in range(1, len(skip_connection_names)+1, 1):
x_skip = model.get_layer(skip_connection_names[-i]).output
x = UpSampling2D((2, 2))(x)
x = Concatenate()([x, x_skip])
x = Conv2D(f[-i], (3, 3), padding="same")(x)
x = BatchNormalization()(x)
x = Activation("relu")(x)
x = Conv2D(f[-i], (3, 3), padding="same")(x)
x = BatchNormalization()(x)
x = Activation("relu")(x)
x = Conv2D(1, (1, 1), padding="same")(x)
x = Activation("sigmoid")(x)
model = Model(inputs, x)
return model
model = create_model()
model.summary()
smooth = 1e-15
def dice_coefficient(y_true, y_pred):
y_true = tf.keras.layers.Flatten()(y_true)
y_pred = tf.keras.layers.Flatten()(y_pred)
intersection = tf.reduce_sum(y_true * y_pred)
return (2. * intersection + smooth) / (tf.reduce_sum(y_true) + tf.reduce_sum(y_pred) + smooth)
#Define loss function
def loss(y_true, y_pred):
# return binary_crossentropy(y_true, y_pred) - tensorflow.keras.backend.log(dice_coefficient(y_true, y_pred) + tensorflow.keras.backend.epsilon())
return 1.0 - dice_coefficient(y_true, y_pred)
Compile the model
Define optimizer
Define metrics
Define loss
optimizer = Adam(lr=1e-4, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
model.compile(loss=loss, optimizer=optimizer, metrics=[dice_coefficient, Recall(), Precision()])
EPOCHS = 32
BATCH = 10
callbacks = [
ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=4),
EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=False)
]
train_steps = len(X_train)//BATCH
valid_steps = len(X_val)//BATCH
if len(X_train) % BATCH != 0:
train_steps += 1
if len(X_val) % BATCH != 0:
valid_steps += 1
model.fit(
X_train, y_train,
validation_data=(X_val, y_val),
epochs=EPOCHS,
steps_per_epoch=train_steps,
validation_steps=valid_steps,
callbacks=callbacks
)
#evaluate the model
test_steps = (len(X_test)//BATCH)
if len(X_test) % BATCH != 0:
test_steps += 1
model.evaluate(X_test, y_test, steps=test_steps)
The model has
filename = 'Part 1Test Data - Prediction Image.jpeg'
unscaled = cv2.imread(filename)
image = cv2.resize(unscaled, (224, 224))
feat_scaled = preprocess_input(np.array(image, dtype=np.float32))
print(feat_scaled)
y_pred = model.predict(np.array([feat_scaled]))
y_pred
pred_mask = cv2.resize((1.0*(y_pred[0] > 0.5)), (224,224))
Viewing the predicted image
plt.imshow(feat_scaled)
plt.imshow(pred_mask)
We are able to identify the face in the given pictures correctly.
# opening the zip file in READ mode
with ZipFile("Part 3 - Aligned Face Dataset from Pinterest.zip", 'r') as zip:
zip.extractall()
print('Done!')
directory = '/content/drive/MyDrive/Colab Notebooks/Adv_CV2/PINS/'
class IdentityMetadata():
def __init__(self, base, name, file):
# print(base, name, file)
self.base = base #directory
self.name = name #name
self.file = file
def __repr__(self):
return self.image_path()
def image_path(self):
return os.path.join(self.base, self.name, self.file)
def load_metadata(path):
metadata = []
for i in os.listdir(path):
for f in os.listdir(os.path.join(path, i)):
# Check file extension. Allow only jpg/jpeg' files.
ext = os.path.splitext(f)[1]
if ext == '.jpg' or ext == '.jpeg':
metadata.append(IdentityMetadata(path, i, f))
return np.array(metadata)
metadata = load_metadata('PINS')
def load_image(path):
img = cv2.imread(path, 1)
# OpenCV loads images with color channels
# in BGR order. So we need to reverse them
return img[...,::-1]
print(metadata.shape)
metadata
Load few sample image
image = load_image("PINS/pins_zendaya/zendaya99.jpg")
print(image.shape)
plt.imshow(image)
image2 = load_image("PINS/pins_Aaron Paul/Aaron Paul101_248.jpg")
print(image2.shape)
plt.imshow(image2)
def vgg_face():
model = Sequential()
model.add(ZeroPadding2D((1,1),input_shape=(224,224, 3)))
model.add(Convolution2D(64, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(128, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(256, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(256, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(256, (3, 3), activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, (3, 3), activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, (3, 3), activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))
model.add(Convolution2D(4096, (7, 7), activation='relu'))
model.add(Dropout(0.5))
model.add(Convolution2D(4096, (1, 1), activation='relu'))
model.add(Dropout(0.5))
model.add(Convolution2D(2622, (1, 1)))
model.add(Flatten())
model.add(Activation('softmax'))
return model
model = vgg_face()
model.load_weights("/content/drive/MyDrive/Colab Notebooks/Adv_CV2/Part 3 - vgg_face_weights.h5")
Get vgg_face_descriptor
vgg_face_descriptor = Model(inputs=model.layers[0].input, outputs=model.layers[-2].output)
Generate embeddings for each image in the dataset
# Get embedding vector for first image in the metadata using the pre-trained model
img_path = metadata[0].image_path()
img = load_image(img_path)
# Normalising pixel values from [0-255] to [0-1]: scale RGB values to interval [0,1]
img = (img / 255.).astype(np.float32)
img = cv2.resize(img, dsize = (224,224))
print(img.shape)
# Obtain embedding vector for an image
# Get the embedding vector for the above image using vgg_face_descriptor model and print the shape
embedding_vector = vgg_face_descriptor.predict(np.expand_dims(img, axis=0))[0]
print(embedding_vector.shape)
Generate embeddings for all images
embeddings = np.zeros((metadata.shape[0], 2622))
for i, m in enumerate(metadata):
image_path = m.image_path()
image = load_image(image_path)
image = (image/255.).astype(np.float32)
image = cv2.resize(image, dsize=(224,224))
embedding_vector = vgg_face_descriptor.predict(np.expand_dims(image, axis=0))[0]
embeddings[i] = embedding_vector
embeddings
#Function to calculate distance between given 2 pairs of images.
def distance(emb1, emb2):
return np.sum(np.square(emb1 - emb2))
Plot images and get distance between the pairs given below
import matplotlib.pyplot as plt
def show_pair(idx1, idx2):
plt.figure(figsize=(8,3))
plt.suptitle(f'Distance = {distance(embeddings[idx1], embeddings[idx2]):.2f}')
plt.subplot(121)
plt.imshow(load_image(metadata[idx1].image_path()))
plt.subplot(122)
plt.imshow(load_image(metadata[idx2].image_path()));
show_pair(2, 3)
show_pair(2, 180)
show_pair(30,31)
show_pair(30,100)
show_pair(70,72)
show_pair(70,115)
Create train and test sets
train_idx = np.arange(metadata.shape[0]) % 9 != 0 #every 9th example goes in test data and rest go in train data
test_idx = np.arange(metadata.shape[0]) % 9 == 0
# one half as train examples of 10 identities
X_train = embeddings[train_idx]
# another half as test examples of 10 identities
X_test = embeddings[test_idx]
targets = np.array([m.name for m in metadata])
#train labels
y_train = targets[train_idx]
#test labels
y_test = targets[test_idx]
print(X_train[0])
print(y_train[0])
np.unique(y_train)
np.unique(y_test)
Encode the Labels
le = LabelEncoder()
le.fit(y_train)
y_train = le.transform(y_train)
y_test = le.transform(y_test)
Standardize the feature values
# Standarize features
scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
Reduce dimensions using PCA
from sklearn.decomposition import PCA
pca = PCA(n_components=128)
pca.fit(X_train)
X_train = pca.transform(X_train)
X_test = pca.transform(X_test)
Build a Classifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
svc = SVC()
svc.fit(X_train,y_train)
y_pred = svc.predict(X_test)
print(accuracy_score(y_test,y_pred))
Test results
import warnings
# Suppress LabelEncoder warning
warnings.filterwarnings('ignore')
example_idx =200
example_image = load_image(metadata[test_idx][example_idx].image_path())
image = (example_image/255.).astype(np.float32)
image = cv2.resize(image, dsize=(224,224))
vector = vgg_face_descriptor.predict(np.expand_dims(image,axis=0))[0]
vector = np.reshape(vector,(1,-1))
scaled_vector = scaler.transform(vector)
pca_transformed = pca.transform(scaled_vector)
example_prediction = svc.predict(pca_transformed)
example_identity = le.inverse_transform(example_prediction)[0]
plt.imshow(example_image)
plt.title(f'Identified as {example_identity}');
## predict first test image
example_image = load_image("Part 2 - Test Image - Dwayne Johnson4.jpg")
image = (example_image/255.).astype(np.float32)
image = cv2.resize(image, dsize=(224,224))
vector = vgg_face_descriptor.predict(np.expand_dims(image,axis=0))[0]
vector = np.reshape(vector,(1,-1))
scaled_vector = scaler.transform(vector)
pca_transformed = pca.transform(scaled_vector)
example_prediction = svc.predict(pca_transformed)
example_identity = le.inverse_transform(example_prediction)[0]
plt.imshow(example_image)
plt.title(f'Identified as {example_identity}');
## predict second test image
example_image = load_image("Part 2- Test Image - Benedict Cumberbatch9.jpg")
image = (example_image/255.).astype(np.float32)
image = cv2.resize(image, dsize=(224,224))
vector = vgg_face_descriptor.predict(np.expand_dims(image,axis=0))[0]
vector = np.reshape(vector,(1,-1))
scaled_vector = scaler.transform(vector)
pca_transformed = pca.transform(scaled_vector)
example_prediction = svc.predict(pca_transformed)
example_identity = le.inverse_transform(example_prediction)[0]
plt.imshow(example_image)
plt.title(f'Identified as {example_identity}');
Able to predict the face correctly for both the test images.